In [54]:
import sys
!{sys.executable} -m pip install plotly
!{sys.executable} -m pip install Dash
!{sys.executable} -m pip install jupyter-dash
Requirements already satisfied
In [105]:
import sys
!{sys.executable} -m pip install hide_code
!{sys.executable} -m jupyter nbextension install --py --user hide_code
!{sys.executable} -m jupyter nbextension enable --py --user hide_code
!{sys.executable} -m jupyter serverextension enable --py --user hide_code
Requirements already satisfied
Installing C:\Users\tomas\anaconda3\lib\site-packages\hide_code\ done

Data Load/Prep¶

In [3]:
import pandas as pd
import numpy as np
import jupyter_dash as JupyterDash
import plotly.express as px
import os
In [10]:
current_path = os.getcwd()
files = os.listdir(current_path + '\\Data\\')
df = pd.DataFrame()
for file in files:
    temp = pd.read_csv(current_path + '\\Data\\'+ file)
    temp['file_name'] = file
    df = pd.concat([df,temp])
df[['city','type']] = df.file_name.str.split('_', expand = True)
df.type = df.type.str[:-4]
df = df.drop(columns = 'file_name')
In [11]:
print(df.columns)
print(len(df))
Index(['Unnamed: 0', 'realSum', 'room_type', 'room_shared', 'room_private',
       'person_capacity', 'host_is_superhost', 'multi', 'biz',
       'cleanliness_rating', 'guest_satisfaction_overall', 'bedrooms', 'dist',
       'metro_dist', 'attr_index', 'attr_index_norm', 'rest_index',
       'rest_index_norm', 'lng', 'lat', 'city', 'type'],
      dtype='object')
51707

Interactive Plotly Graphs¶

In [12]:
import plotly.graph_objects as go
import plotly.io as pio
pio.renderers.default = "notebook_connected"

fig1 = go.Figure()

cities = df["city"].drop_duplicates()


fig1.add_trace(go.Violin(x=df['city'][df['type'] == 'weekdays'],
                        y=df['realSum'][df['type'] == 'weekdays'],
                        legendgroup='weekdays',
                     
                        name='weekdays',
                        side='negative',
                        line_color='blue',
                        box_visible=True,
                        meanline_visible=True))


fig1.add_trace(go.Violin(x=df['city'][df['type'] == 'weekends'],
                    y=df['realSum'][df['type'] == 'weekends'],
                    legendgroup='weekends',
      
                    name='weekends',
                    side='positive',
                    line_color='orange',
                    box_visible=True,
                    meanline_visible=True))


#added spanmode to limit the density (before it was showing negative values due to kernel density estimation)
fig1.update_traces(meanline_visible=True, spanmode = 'hard')
fig1.update_layout(violingap=0, violingroupgap=0, violinmode='overlay', title ='AirBnB price distribution per city and weekday/weekend')
fig1.show()

We can see that outliers of expensive AirBnB locations make the graph less readable.

However, because the graph is interactive we can zoom in on the section of the graph that doesn't include outliers by selecting specific area on the graph.

  • We can also zoom into specific cities
  • Hovering over each corresponding violin plot will also show us key labels for min/max/mean/median and interquartile ranges of prices for each city

However let's plot the same graph after dropping outliers with values that are in the 0.5% of top values

In [13]:
# search for outliers, which are largely represented at price data
for city in df.city.unique():
    df.loc[df.city == city, 'outliers'] = df.loc[df.city == city, 'realSum'] > df.loc[df.city == city, 'realSum'].quantile(0.995)

dflen1=len(df)
df1 = df.drop(df[df.outliers == True].index)
dflen2=len(df1)
print(f'Dropped ' + str(dflen1-dflen2) + ' outliers')
Dropped 3636 outliers
In [14]:
import plotly.graph_objects as go
import plotly.io as pio
pio.renderers.default = "notebook_connected"

fig1 = go.Figure()

cities = df1["city"].drop_duplicates()


fig1.add_trace(go.Violin(x=df1['city'][df1['type'] == 'weekdays'],
                        y=df1['realSum'][df1['type'] == 'weekdays'],
                        legendgroup='weekdays',
                     
                        name='weekdays',
                        side='negative',
                        line_color='blue',
                        box_visible=True,
                        meanline_visible=True))


fig1.add_trace(go.Violin(x=df1['city'][df1['type'] == 'weekends'],
                    y=df1['realSum'][df1['type'] == 'weekends'],
                    legendgroup='weekends',
      
                    name='weekends',
                    side='positive',
                    line_color='orange',
                    box_visible=True,
                    meanline_visible=True))


#added spanmode to limit the density (before it was showing negative values due to kernel density estimation)
fig1.update_traces(meanline_visible=True, spanmode = 'hard')
fig1.update_layout(violingap=0, violingroupgap=0, violinmode='overlay', title ='AirBnB price distribution per city and weekday/weekend')
fig1.show()

Animated Graph in Plotly¶

Let's create an animated graph which is packaged into a Dash App & deployed in GCP.

In [15]:
df4 = pd.pivot_table(df, values = ['realSum','guest_satisfaction_overall'],index=['person_capacity','city'], aggfunc=np.mean)
df5=df4.reset_index()
df5.to_csv(file,index=False)
df6=pd.read_csv('vienna_weekends.csv')
In [17]:
df6.head()
Out[17]:
person_capacity city guest_satisfaction_overall realSum
0 2.0 amsterdam 94.380914 417.738213
1 2.0 athens 94.278374 131.160254
2 2.0 barcelona 91.584144 226.304795
3 2.0 berlin 95.088330 202.941984
4 2.0 budapest 94.459916 139.075117
In [18]:
#setup which works only works locally, we had to deploy it online
app3 = JupyterDash(__name__)


app3.layout = html.Div([
    html.H4('Animated AirBnB Prices'),
    html.P("Select an animation:"),
    dcc.RadioItems(
        id='selection',
        options=["Bar","Scatter"],
        value='Bar',
    ),
    dcc.Loading(dcc.Graph(id="graph"), type="cube")
])


@app3.callback(
    Output("graph", "figure"), 
    Input("selection", "value"))

def display_animated_graph(selection):
    animations = {'Scatter': px.scatter(
            df6, x="guest_satisfaction_overall", y="realSum", animation_frame="person_capacity", 
            animation_group="city", size='realSum', color="city", 
            hover_name="realSum", size_max=50, 
            range_x=[80,110], range_y=[0,2000]),

        'Bar': px.bar(
            df6, x="city", y="realSum", 
            animation_frame="person_capacity", animation_group="city", 
            range_y=[0,2000]),
    }
    return animations[selection]


app3.run_server(mode='inline',debug=True, port=1235)

App has been deployed on Google Cloud Platform via Docker.

You can access it here

Map Visualizations in Plotly¶

Let's create a map of AirBnB locations in Rome and visually show how far the locations are from the closest metro station.

In [19]:
px.set_mapbox_access_token('myaccesstoken')
In [58]:
rome = df[(df["city"]=='rome') & (df["type"]=='weekdays')]

First, let's understand the distribution of distances in order to define our buckets. (adding a breakdown regarding superhost status just for fun.)

In [59]:
import plotly.express as px

fig = px.histogram(rome, x="metro_dist",  color='host_is_superhost', marginal="rug", # can be `box`, `violin`
                         hover_data=rome.columns)
fig.show()
In [74]:
import warnings
warnings.filterwarnings("ignore")
# Creating a new column with bins showing the distance from the nearest metro station
cut_labels_5 = ['Very Close (<0.25km)', 'Close (0.25-0.5km)', 'Walkable (0.5km-1.25km)', 'Far (1.25km-2.5km)', 'Very Far (>2.5km)']
cut_bins = [0, 0.25, 0.5, 1.25, 2.5, 5]
rome['Distance'] = pd.cut(rome['metro_dist'], bins=cut_bins, labels=cut_labels_5)

The map shows all weekday AirBnB locations in Rome with different colors based on the distance from the closest metro station. The size of each bubble coresponds to the price of the AirBnB. You can hover over each value for more information.

In [76]:
import plotly.express as px

fig = px.scatter_mapbox(rome, lat='lat', lon='lng',   color="Distance", size="realSum",
                  color_continuous_scale=px.colors.cyclical.IceFire, size_max=15, zoom=10,  
                        category_orders={"Distance": ['Very Close (<0.25km)', 'Close (0.25-0.5km)', 'Walkable (0.5km-1.25km)', 'Far (1.25km-2.5km)', 'Very Far (>2.5km)']})
fig.show()